This is the companion R Markdown document to the following presentations that were delivered in Winter 2015:
Adding the TIQ-TEST functions
## Some limitations from not being an R package: Setting the Working directory
tiqtest.dir = file.path("..", "tiq-test")
current.dir = setwd(tiqtest.dir)
source("tiq-test.R")
## Setting the root data path to where it should be in this repo
.tiq.data.setRootPath(file.path(current.dir, "data"))
## INFO [2015-01-29 16:31:55 PST] pid=11961 tiq.data.setRootPath: Setting path to '/Users/alexcp/src/tiq-test-Winter2015/data'
We have roughly 2 months of data available on this public dataset:
print(tiq.data.getAvailableDates("raw", "public_outbound"))
## [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
## [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"
print(tiq.data.getAvailableDates("raw", "public_inbound"))
## [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
## [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"
This time, we also have a couple of private data feeds over some of this time, but the information in them cannot be shared publicly as a part of this release:
if (tiq.data.isDatasetAvailable("raw", "private1")) {
print(tiq.data.getAvailableDates("raw", "private1"))
}
## [1] "20141001" "20141002" "20141004" "20141005" "20141006" "20141007"
## [7] "20141008" "20141009" "20141010" "20141011" "20141012" "20141013"
## [13] "20141014" "20141015" "20141016" "20141017" "20141018" "20141019"
## [19] "20141020" "20141021" "20141022" "20141023" "20141024" "20141025"
## [25] "20141026" "20141027" "20141028" "20141029" "20141030" "20141031"
## [31] "20141101" "20141102" "20141103" "20141104" "20141105" "20141106"
## [37] "20141107" "20141108" "20141109" "20141110" "20141111" "20141112"
## [43] "20141113" "20141114" "20141115" "20141116" "20141117" "20141118"
## [49] "20141119" "20141120" "20141121" "20141122" "20141123" "20141124"
## [55] "20141125" "20141126" "20141127" "20141128" "20141129" "20141130"
This is an example of “RAW” (not enriched) outbound data imported from combine output
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
outbound.ti[, list(entity, type, direction, source, date)]
## entity type direction source date
## 1: 1.168.15.140 IPv4 outbound alienvault 2014-11-01
## 2: 1.93.6.86 IPv4 outbound alienvault 2014-11-01
## 3: 100.42.211.4 IPv4 outbound alienvault 2014-11-01
## 4: 101.227.172.24 IPv4 outbound alienvault 2014-11-01
## 5: 101.36.81.55 IPv4 outbound alienvault 2014-11-01
## ---
## 11388: up.frigo2000.it FQDN outbound zeus 2014-11-01
## 11389: update.odeen.eu FQDN outbound zeus 2014-11-01
## 11390: update.rifugiopontese.it FQDN outbound zeus 2014-11-01
## 11391: vahendkarasis4.com FQDN outbound zeus 2014-11-01
## 11392: welcahllyn.com FQDN outbound zeus 2014-11-01
We can use the same loadTI function to also gather the enriched datasets:
enrich.ti = tiq.data.loadTI("enriched", "public_outbound", "20141101")
enrich.ti = enrich.ti[, notes := NULL]
tail(enrich.ti)
## entity type direction source date asnumber
## 1: 94.102.63.153 IPv4 outbound zeus 2014-11-01 29073
## 2: 94.103.36.55 IPv4 outbound zeus 2014-11-01 47894
## 3: 95.163.121.12 IPv4 outbound zeus 2014-11-01 12695
## 4: 98.131.185.136 IPv4 outbound zeus 2014-11-01 32392
## 5: 98.131.185.136 IPv4 outbound zeus 2014-11-01 32392
## 6: 99.181.5.83 IPv4 outbound zeus 2014-11-01 7018
## asname country host
## 1: Ecatel Network NL NA
## 2: VeriTeknik Bilisim Ltd. TR NA
## 3: Digital Networks CJSC RU NA
## 4: Ecommerce Corporation US NA
## 5: Ecommerce Corporation US projects.globaltronics.net
## 6: AT&T Services, Inc. US NA
## rhost
## 1: exadomains.net
## 2: datacenter.veriteknik.com
## 3: NA
## 4: NA
## 5: NA
## 6: adsl-99-181-5-83.dsl.irvnca.sbcglobal.net
This specific outbound dataset has the following sources included:
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
unique(outbound.ti$source)
## [1] "alienvault" "feodo" "malcode"
## [4] "malcode_zones" "malwaredomainlist" "malwaredomains"
## [7] "malwaregroup" "palevotracker" "spyeye"
## [10] "sslbl" "zeus"
We can do the same for the inbound data we have to see the sources we have available:
inbound.ti = tiq.data.loadTI("raw", "public_inbound", "20141101")
unique(inbound.ti$source)
## [1] "alienvault" "autoshun" "blocklistde"
## [4] "botscout" "bruteforceblocker" "charleshaley"
## [7] "ciarmy" "dragonresearch" "dshield"
## [10] "honeypot" "openbl" "packetmail"
## [13] "virbl"
Here are some results of running the Novelty test on the inbound data:
inbound.novelty = tiq.test.noveltyTest("public_inbound", "20141001", "20141130",
select.sources=c("alienvault", "blocklistde",
"dshield", "charleshaley"),
.progress=FALSE)
tiq.test.plotNoveltyTest(inbound.novelty, title="Novelty Test - Inbound Indicators")
And results running on the outbound data:
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130",
select.sources=c("alienvault", "malwaregroup",
"malcode", "zeus"),
.progress=FALSE)
tiq.test.plotNoveltyTest(outbound.novelty, title="Novelty Test - Outbound Indicators")
This is an example of applying the Overlap Test to our inbound dataset
overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
print(overlap.plot)
Similarly, an example applying the Overlap Test to the outbound dataset
overlap = tiq.test.overlapTest("public_outbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Outbound Data - 20141101")
print(overlap.plot)
With the population data we can generate some plot to compare the top quantities of reported IP addresses on a specific date by Country
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20141111",
select.sources=NULL, split.ti=F)
inbound.pop = tiq.test.extractPopulationFromTI("public_inbound", "country",
date = "20141111",
select.sources=NULL, split.ti=F)
complete.pop = tiq.data.loadPopulation("mmgeo", "country")
## Warning in max(tiq.data.getAvailableDates(category, group)): no
## non-missing arguments, returning NA
## WARN [2015-01-29 16:33:36 PST] pid=11961 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/population/mmgeo/NA.csv.gz' is invalid. No data available on date 'NA'.
tiq.test.plotPopulationBars(c(inbound.pop, outbound.pop, complete.pop), "country")
outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130")
tiq.test.plotAgingTest(outbound.aging)
inbound.aging = tiq.test.agingTest("public_inbound", "20141001", "20141130")
tiq.test.plotAgingTest(inbound.aging)
outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130",
split.ti=F)
tiq.test.plotAgingTest(outbound.aging)
private.aging = tiq.test.agingTest("private1", "20141001", "20141130",
split.ti=F)
## WARN [2015-01-29 16:37:24 PST] pid=11961 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/enriched/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
tiq.test.plotAgingTest(private.aging, density.limit=0.7)
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20141110",
select.sources=NULL, split.ti=F)
private.pop = tiq.test.extractPopulationFromTI("private1", "country",
date = "20141110",
select.sources=NULL, split.ti=F)
tiq.test.plotPopulationBars(c(private.pop, outbound.pop), "country", title="Comparing Private1 and Public Feeds on 20141110")
private.novelty = tiq.test.noveltyTest("private1", "20141001", "20141130", split.tii=F,
.progress=FALSE)
## WARN [2015-01-29 16:37:32 PST] pid=11961 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/raw/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
tiq.test.plotNoveltyTest(private.novelty)
## Warning: Stacking not well defined when ymin != 0
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130",split.tii=F,
.progress=FALSE)
tiq.test.plotNoveltyTest(outbound.novelty)
## Warning: Stacking not well defined when ymin != 0
This is an example of applying the Overlap Test to our inbound dataset
overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
print(overlap.plot)
overlap = tiq.test.overlapTest(c("public_outbound", "private1"), "20141101", "enriched",
split.ti=F, select.sources=NULL)
tiq.test.plotOverlapTest(overlap, title="OVERLAP - public_outbound VS private1 - 20141101")